home *** CD-ROM | disk | FTP | other *** search
- // Listing 4 - Program to Count field value frequencies
-
- #include <stdlib.h>
- #include <string.h>
- #include <cstring.h>
- #include <iomanip.h>
- #include <fstream.h>
- #include <algo.h>
- #include <map.h>
- #include <vector.h>
-
- struct long_t { long v; long_t() : v(0) {} };
-
- typedef map< string, long_t, less<string> > map_t;
- struct ct_t { string str; long_t count; };
- typedef vector< ct_t > vec_t;
-
- static inline int show( const map_t::value_type& x ) {
- cout << setw(7) << x.second.v << " - " << x.first << endl;
- return 0;
- }
-
- static inline bool order( const ct_t& x, const ct_t& y )
- { return x.count.v > y.count.v; }
-
- static inline int show_v( const ct_t& x ) {
- cout << setw(7) << x.count.v << " - " << x.str << endl;
- return 0;
- }
-
- static size_t fld_pos = 1; // first is 1
- static size_t fld_num = 0;
- static size_t fld_len = 0;
- static char fld_delim = '\0';
- static bool freq_sort = false;
- static char buf[1024];
-
- // assess options, return field to be counted
-
- char * field() {
-
- char * fld;
- char * dp;
- size_t n;
-
- if ( fld_num ) { // field number specified
- for ( n = 1, fld = buf; fld && n < fld_num; ++n ) {
- if ( (fld = strchr( fld, fld_delim )) != 0 ) ++fld;
- } // for
- if ( !fld ) return "";
- } // field number specified
- else fld = buf + fld_pos - 1;
-
- if ( fld_len )
- buf[ fld_len - 1 ] = '\0';
- else if ( fld_delim && (dp = strchr( fld, fld_delim)) != 0 )
- *dp = '\0';
- return fld;
- } // field
-
- // main
-
- int main( int argc, char * argv[] ) {
-
- if ( argc == 1 ) {
- cout <<
- "Usage: count [option...] filename\n"
- "Options specify field for value count:\n"
- " -pn or -fn Position n or field number n. Default -p1\n"
- " -dc Fields delimited with c. Default is to\n"
- " specify fields by position and length.\n"
- " -ln Length of field. Default use rest of line\n"
- " -s Additional report sorted by frequency.\n"
- "Example: count -f4 -d, input.dat" << endl;
- return EXIT_FAILURE;
- } // no arguments
-
- for ( ; argc > 2 && *argv[1] == '-'; ++argv, --argc ) {
- if ( *(argv[1]+1) == 'p' ) fld_pos = atoi( argv[1]+2 );
- else if ( *(argv[1]+1) == 'f' ) fld_num = atoi( argv[1]+2 );
- else if ( *(argv[1]+1) == 'd' ) fld_delim = *( argv[1]+2 );
- else if ( *(argv[1]+1) == 'l' ) fld_len = atoi( argv[1]+2 );
- else if ( *(argv[1]+1) == 's' ) freq_sort = true;
- else { cerr <<"unknown option" <<endl; return EXIT_FAILURE; }
- } // each option
-
- if ( fld_num && !fld_delim )
- { cerr << "-d required when -f specified" << endl;
- return EXIT_FAILURE; }
-
- ifstream in ( argv[1] );
- if ( !in ) { cerr << "Can't open " << argv[1] << endl;
- return EXIT_FAILURE; }
-
- map_t ct_map;
-
- while ( in.getline( buf, sizeof( buf ) ) ) ++ct_map[field()].v;
-
- cout << "Ordered by string:" << endl;
- for_each( ct_map.begin(), ct_map.end(), ptr_fun( show ) );
-
- if ( freq_sort ) {
- vec_t ct_vec;
- ct_vec.reserve( ct_map.size() );
-
- for ( map_t::iterator it = ct_map.begin();
- it != ct_map.end(); ++it ) {
- ct_t ct;
- ct.str = (*it).first;
- ct.count = (*it).second;
- ct_vec.push_back( ct );
- }
-
- // Use sort() until apparent bug in stable_sort() is resolved
- // stable_sort( ct_vec.begin(), ct_vec.end(), ptr_fun(order) );
- sort( ct_vec.begin(), ct_vec.end(), ptr_fun(order) );
-
- cout << "Ordered by count:" << endl;
- for_each( ct_vec.begin(), ct_vec.end(), ptr_fun( show_v ) );
- } // end freq_sort
-
- return EXIT_SUCCESS;
- } // end main
-